Libraries
library(lattice)
library(ISLR)
library(MASS)
library(caret)
Loading required package: ggplot2
library(tidyverse)
[30m── [1mAttaching packages[22m ────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──[39m
[30m[32m✔[30m [34mtibble [30m 2.0.1 [32m✔[30m [34mpurrr [30m 0.3.2
[32m✔[30m [34mtidyr [30m 0.8.3 [32m✔[30m [34mdplyr [30m 0.8.3
[32m✔[30m [34mreadr [30m 1.3.1 [32m✔[30m [34mstringr[30m 1.3.1
[32m✔[30m [34mtibble [30m 2.0.1 [32m✔[30m [34mforcats[30m 0.4.0[39m
[30m── [1mConflicts[22m ───────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31m✖[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()
[31m✖[30m [34mpurrr[30m::[32mlift()[30m masks [34mcaret[30m::lift()
[31m✖[30m [34mdplyr[30m::[32mselect()[30m masks [34mMASS[30m::select()[39m
library(rpart)
library(plyr); library(dplyr)
-------------------------------------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
-------------------------------------------------------------------------------------------------------------
Attaching package: ‘plyr’
The following objects are masked from ‘package:dplyr’:
arrange, count, desc, failwith, id, mutate, rename, summarise, summarize
The following object is masked from ‘package:purrr’:
compact
library(caret)
library(rattle) # Fancy tree plot
Rattle: A free graphical interface for data science with R.
Version 5.2.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
library(dplyr)
library(parallel)
library(Hmisc)
Loading required package: survival
Attaching package: ‘survival’
The following object is masked _by_ ‘.GlobalEnv’:
heart
The following object is masked from ‘package:rpart’:
solder
The following object is masked from ‘package:caret’:
cluster
Loading required package: Formula
Attaching package: ‘Hmisc’
The following objects are masked from ‘package:plyr’:
is.discrete, summarize
The following objects are masked from ‘package:dplyr’:
src, summarize
The following object is masked from ‘package:e1071’:
impute
The following objects are masked from ‘package:base’:
format.pval, units
library(e1071)
library(pROC)
Type 'citation("pROC")' for a citation.
Attaching package: ‘pROC’
The following objects are masked from ‘package:stats’:
cov, smooth, var
library(ggplot2)
library(rpart.plot)
library(VIM)
Loading required package: colorspace
Attaching package: ‘colorspace’
The following object is masked from ‘package:pROC’:
coords
Loading required package: grid
Loading required package: data.table
data.table 1.12.2 using 4 threads (see ?getDTthreads). Latest news: r-datatable.com
Attaching package: ‘data.table’
The following objects are masked from ‘package:dplyr’:
between, first, last
The following object is masked from ‘package:purrr’:
transpose
VIM is ready to use.
Since version 4.0.0 the GUI is in its own package VIMGUI.
Please use the package to use the new (and old) GUI.
Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
Attaching package: ‘VIM’
The following object is masked from ‘package:datasets’:
sleep
library(mice)
Attaching package: ‘mice’
The following object is masked from ‘package:tidyr’:
complete
The following objects are masked from ‘package:base’:
cbind, rbind
# Reading the dataset
nbi <- read.csv('../../../data/trees/decision-tree-dataset/decision_tree.csv')
# Select attributesk
df <- nbi %>% dplyr::select(adt.cat, adtt.cat, material, state, structure.number, structure.type, type.of.wearing.surface, current.deck, current.substructure, current.superstructure, total.deck.intervention, total.sub.intervention, total.super.intervention, deck.intervention.in.next.3.years, sub.intervention.in.next.3.years, super.intervention.in.next.3.years, precipitation, snowfall, freezethaw, score)
Dataset to model deck of the bridges
# Select attributes
df_deck <- nbi %>% dplyr::select(adt.cat, adtt.cat, material, structure.type, type.of.wearing.surface, current.deck, current.substructure, current.superstructure, total.deck.intervention, total.sub.intervention, total.super.intervention, precipitation, snowfall, freezethaw, score, deck.intervention.in.next.3.years)
# Remove null values
df_deck <- na.omit(df_deck)
Preview of the data
head(df_deck)
Training and testing Deck
target_variable <- 'deck.intervention.in.next.3.years'
index = createDataPartition(y=df_deck[[target_variable]], p=0.7, list=FALSE)
train.set = df_deck[index,]
test.set = df_deck[-index,]
positive_class = 'No'
negative_class = 'Yes'
reset.seed <- function()
{
# ensure results are repeatable
set.seed(1337)
}
library(doParallel)
num_cores <- detectCores() #note: you can specify a smaller number if you want
cl <- makePSOCKcluster(num_cores)
registerDoParallel(cl)
reset.seed()
model <- deck.intervention.in.next.3.years ~ adt.cat + adtt.cat + material + structure.type + type.of.wearing.surface + current.deck + current.substructure + current.superstructure + total.deck.intervention + total.sub.intervention + total.super.intervention + precipitation + snowfall + freezethaw + score
tunelengths = seq(from=5, to=10, by=5)
list_sens <- c()
list_spec <- c()
list_f1 <-c()
list_tl <-c()
list_kappa <-c()
list_auc <- c()
probabilities_dt <- data.frame(No=double(), Yes=double())
for(tl in tunelengths) {
rtree_model = train( model,
data = train.set,
method = "rpart",
trControl = trainControl(method = "repeatedcv", search = 'random', repeats = 5,
summaryFunction = twoClassSummary,
classProbs = T, savePredictions = T), tuneLength = tl,
metric='ROC')
rtree_model
# Predict on the training set
tree_class_test <- rtree_model%>% predict(newdata = test.set, type = 'raw')
tree_prob_test <- rtree_model%>% predict(newdata = test.set, type = 'prob')
# Confusion Matrix
metrics <- confusionMatrix(tree_class_test, test.set[[target_variable]])
metricsbyclass <- metrics$byClass
sens <- type.convert(metricsbyclass[1])
list_sens <- c(list_sens, sens)
spec <- type.convert(metricsbyclass[2])
list_spec <- c(list_spec, spec)
f1 <- type.convert(metricsbyclass[7])
list_f1 <- c(list_f1, f1)
tunelen <- tl
list_tl <- c(list_tl, tunelen)
kappa <- type.convert(metrics$overall[2])
list_kappa <- c(list_kappa, kappa)
# Confusion matrix
area <- roc(test.set[[target_variable]], tree_prob_test[[positive_class]], plot = TRUE, print.auc = TRUE, legacy.axes = TRUE)
list_auc <- c(list_auc, area$auc)
# rpart model
rpart.plot(rtree_model$finalModel)
filename <-paste('../../../data/trees/models/deck/tree-deck-no-undersample', toString(tl), '.csv', collapse = '')
write.csv(rpart.rules(rtree_model$finalModel, roundint=FALSE, clip.facs=TRUE), filename)
# Probability values
tree_prob_test['Tunelength'] <- rep(tl, length(tree_class_test))
tree_prob_test['StructureNumber'] <- df[rownames(tree_prob_test),]$structure.number
# Concatenate
probabilities_dt <- bind_rows(probabilities_dt, tree_prob_test)
# metrics
metrics
}
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 18 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 17 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 16 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 15 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 14 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 13 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 12 (<-localhost:11814)
Warning in .Internal(gc(verbose, reset, full)) :
closing unused connection 11 (<-localhost:11814)
Setting levels: control = No, case = Yes
Setting direction: controls > cases




df_metric_dt <- data.frame(list_sens, list_spec, list_f1, list_tl, list_kappa, list_auc)
names(df_metric_dt) <- c('Sensitivity', 'Specificity', 'F1','Tunelength', 'Kappa', 'AUC')
# Writing outputs
write.csv(probabilities_dt, '../../../data/trees/metrics/dt/tree-prob-deck-nou.csv')
write.csv(probabilities_dt, '../../../data/trees/metrics/dt/tree-metric-deck-nou.csv')
write.csv(df_metric_dt, '../../../data/trees/metrics/dt/tree-metric-deck-nou.csv')
df_metric_dt
probabilities_dt
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKYXV0aG9yOiBBa3NoYXkgS2FsZQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKZGVzY3JpcHRpb246IERlY2lzaW9uIHRyZWUgbW9kZWxpbmcgb2YgZGVjayB3aXRob3V0IHVuZGVyc2FtcGxpbmcuCi0tLQoKIyMjIyAqTGlicmFyaWVzKgpgYGB7cn0KbGlicmFyeShsYXR0aWNlKQpsaWJyYXJ5KElTTFIpCmxpYnJhcnkoTUFTUykKbGlicmFyeShjYXJldCkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocnBhcnQpCmxpYnJhcnkocGx5cik7IGxpYnJhcnkoZHBseXIpCmxpYnJhcnkoY2FyZXQpCmxpYnJhcnkocmF0dGxlKSAgICAgICAgICAgICAgICAgIyBGYW5jeSB0cmVlIHBsb3QKbGlicmFyeShycGFydC5wbG90KSAKbGlicmFyeShkcGx5cikKbGlicmFyeShwYXJhbGxlbCkKbGlicmFyeShIbWlzYykKbGlicmFyeShlMTA3MSkKbGlicmFyeShwUk9DKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkocnBhcnQucGxvdCkKbGlicmFyeShWSU0pCmxpYnJhcnkobWljZSkKCiMgUmVhZGluZyB0aGUgZGF0YXNldApuYmkgPC0gcmVhZC5jc3YoJy4uLy4uLy4uL2RhdGEvdHJlZXMvZGVjaXNpb24tdHJlZS1kYXRhc2V0L2RlY2lzaW9uX3RyZWUuY3N2JykKCiMgU2VsZWN0IGF0dHJpYnV0ZXNrCmRmIDwtIG5iaSAlPiUgZHBseXI6OnNlbGVjdChhZHQuY2F0LCBhZHR0LmNhdCwgbWF0ZXJpYWwsIHN0YXRlLCBzdHJ1Y3R1cmUubnVtYmVyLCBzdHJ1Y3R1cmUudHlwZSwgdHlwZS5vZi53ZWFyaW5nLnN1cmZhY2UsIGN1cnJlbnQuZGVjaywgY3VycmVudC5zdWJzdHJ1Y3R1cmUsIGN1cnJlbnQuc3VwZXJzdHJ1Y3R1cmUsIHRvdGFsLmRlY2suaW50ZXJ2ZW50aW9uLCB0b3RhbC5zdWIuaW50ZXJ2ZW50aW9uLCB0b3RhbC5zdXBlci5pbnRlcnZlbnRpb24sIGRlY2suaW50ZXJ2ZW50aW9uLmluLm5leHQuMy55ZWFycywgc3ViLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMsIHN1cGVyLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMsIHByZWNpcGl0YXRpb24sIHNub3dmYWxsLCBmcmVlemV0aGF3LCBzY29yZSkKYGBgCgojIyMjICpEYXRhc2V0IHRvIG1vZGVsIGRlY2sgb2YgdGhlIGJyaWRnZXMqCmBgYHtyfQojIFNlbGVjdCBhdHRyaWJ1dGVzCmRmX2RlY2sgPC0gbmJpICU+JSBkcGx5cjo6c2VsZWN0KGFkdC5jYXQsIGFkdHQuY2F0LCBtYXRlcmlhbCwgc3RydWN0dXJlLnR5cGUsIHR5cGUub2Yud2VhcmluZy5zdXJmYWNlLCBjdXJyZW50LmRlY2ssIGN1cnJlbnQuc3Vic3RydWN0dXJlLCBjdXJyZW50LnN1cGVyc3RydWN0dXJlLCB0b3RhbC5kZWNrLmludGVydmVudGlvbiwgdG90YWwuc3ViLmludGVydmVudGlvbiwgdG90YWwuc3VwZXIuaW50ZXJ2ZW50aW9uLCAgcHJlY2lwaXRhdGlvbiwgc25vd2ZhbGwsIGZyZWV6ZXRoYXcsIHNjb3JlLCBkZWNrLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMpCgojIFJlbW92ZSBudWxsIHZhbHVlcwpkZl9kZWNrIDwtIG5hLm9taXQoZGZfZGVjaykKYGBgCgojIyMjICpQcmV2aWV3IG9mIHRoZSBkYXRhKgpgYGB7cn0KaGVhZChkZl9kZWNrKQpgYGAKCiMjIyMgKlRyYWluaW5nIGFuZCB0ZXN0aW5nIERlY2sqCmBgYHtyfQp0YXJnZXRfdmFyaWFibGUgPC0gJ2RlY2suaW50ZXJ2ZW50aW9uLmluLm5leHQuMy55ZWFycycKaW5kZXggPSBjcmVhdGVEYXRhUGFydGl0aW9uKHk9ZGZfZGVja1tbdGFyZ2V0X3ZhcmlhYmxlXV0sIHA9MC43LCBsaXN0PUZBTFNFKQp0cmFpbi5zZXQgPSBkZl9kZWNrW2luZGV4LF0KdGVzdC5zZXQgPSBkZl9kZWNrWy1pbmRleCxdCgpwb3NpdGl2ZV9jbGFzcyA9ICdObycKbmVnYXRpdmVfY2xhc3MgPSAnWWVzJyAKCnJlc2V0LnNlZWQgPC0gZnVuY3Rpb24oKQp7CiAgIyBlbnN1cmUgcmVzdWx0cyBhcmUgcmVwZWF0YWJsZQogIHNldC5zZWVkKDEzMzcpCn0KbGlicmFyeShkb1BhcmFsbGVsKQpudW1fY29yZXMgPC0gZGV0ZWN0Q29yZXMoKSAjbm90ZTogeW91IGNhbiBzcGVjaWZ5IGEgc21hbGxlciBudW1iZXIgaWYgeW91IHdhbnQKY2wgPC0gbWFrZVBTT0NLY2x1c3RlcihudW1fY29yZXMpCnJlZ2lzdGVyRG9QYXJhbGxlbChjbCkKCnJlc2V0LnNlZWQoKQptb2RlbCA8LSBkZWNrLmludGVydmVudGlvbi5pbi5uZXh0LjMueWVhcnMgfiBhZHQuY2F0ICsgYWR0dC5jYXQgKyBtYXRlcmlhbCArIHN0cnVjdHVyZS50eXBlICsgdHlwZS5vZi53ZWFyaW5nLnN1cmZhY2UgKyBjdXJyZW50LmRlY2sgKyBjdXJyZW50LnN1YnN0cnVjdHVyZSArIGN1cnJlbnQuc3VwZXJzdHJ1Y3R1cmUgKyB0b3RhbC5kZWNrLmludGVydmVudGlvbiArIHRvdGFsLnN1Yi5pbnRlcnZlbnRpb24gKyB0b3RhbC5zdXBlci5pbnRlcnZlbnRpb24gKyAgcHJlY2lwaXRhdGlvbiArIHNub3dmYWxsICsgZnJlZXpldGhhdyArIHNjb3JlCgp0dW5lbGVuZ3RocyA9IHNlcShmcm9tPTUsIHRvPTEwMCwgYnk9NSkKbGlzdF9zZW5zIDwtIGMoKQpsaXN0X3NwZWMgPC0gYygpCmxpc3RfZjEgPC1jKCkKbGlzdF90bCA8LWMoKQpsaXN0X2thcHBhIDwtYygpCmxpc3RfYXVjIDwtIGMoKQpwcm9iYWJpbGl0aWVzX2R0IDwtIGRhdGEuZnJhbWUoTm89ZG91YmxlKCksIFllcz1kb3VibGUoKSkKCmZvcih0bCBpbiB0dW5lbGVuZ3RocykgewpydHJlZV9tb2RlbCA9IHRyYWluKCBtb2RlbCwKICAgICAgICAgICAgICAgICBkYXRhID0gdHJhaW4uc2V0LAogICAgICAgICAgICAgICAgIG1ldGhvZCA9ICJycGFydCIsCiAgICAgICAgICAgICAgICAgdHJDb250cm9sID0gdHJhaW5Db250cm9sKG1ldGhvZCA9ICJyZXBlYXRlZGN2Iiwgc2VhcmNoID0gJ3JhbmRvbScsIHJlcGVhdHMgPSA1LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdW1tYXJ5RnVuY3Rpb24gPSB0d29DbGFzc1N1bW1hcnksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGNsYXNzUHJvYnMgPSBULCBzYXZlUHJlZGljdGlvbnMgPSBUKSwgIHR1bmVMZW5ndGggPSB0bCwKICAgICAgICAgICAgICAgICBtZXRyaWM9J1JPQycpCgoKcnRyZWVfbW9kZWwKCiMgUHJlZGljdCBvbiB0aGUgdHJhaW5pbmcgc2V0CnRyZWVfY2xhc3NfdGVzdCA8LSBydHJlZV9tb2RlbCU+JSBwcmVkaWN0KG5ld2RhdGEgPSB0ZXN0LnNldCwgdHlwZSA9ICdyYXcnKQp0cmVlX3Byb2JfdGVzdCA8LSBydHJlZV9tb2RlbCU+JSBwcmVkaWN0KG5ld2RhdGEgPSB0ZXN0LnNldCwgdHlwZSA9ICdwcm9iJykKCiMgQ29uZnVzaW9uIE1hdHJpeAptZXRyaWNzIDwtIGNvbmZ1c2lvbk1hdHJpeCh0cmVlX2NsYXNzX3Rlc3QsIHRlc3Quc2V0W1t0YXJnZXRfdmFyaWFibGVdXSkKbWV0cmljc2J5Y2xhc3MgPC0gbWV0cmljcyRieUNsYXNzCgpzZW5zIDwtIHR5cGUuY29udmVydChtZXRyaWNzYnljbGFzc1sxXSkKbGlzdF9zZW5zIDwtIGMobGlzdF9zZW5zLCBzZW5zKQoKc3BlYyA8LSB0eXBlLmNvbnZlcnQobWV0cmljc2J5Y2xhc3NbMl0pCmxpc3Rfc3BlYyA8LSBjKGxpc3Rfc3BlYywgc3BlYykKCmYxIDwtIHR5cGUuY29udmVydChtZXRyaWNzYnljbGFzc1s3XSkKbGlzdF9mMSA8LSBjKGxpc3RfZjEsIGYxKQoKdHVuZWxlbiA8LSB0bApsaXN0X3RsIDwtIGMobGlzdF90bCwgdHVuZWxlbikKCmthcHBhIDwtIHR5cGUuY29udmVydChtZXRyaWNzJG92ZXJhbGxbMl0pCmxpc3Rfa2FwcGEgPC0gYyhsaXN0X2thcHBhLCBrYXBwYSkKCiMgQ29uZnVzaW9uIG1hdHJpeAphcmVhIDwtIHJvYyh0ZXN0LnNldFtbdGFyZ2V0X3ZhcmlhYmxlXV0sIHRyZWVfcHJvYl90ZXN0W1twb3NpdGl2ZV9jbGFzc11dLCBwbG90ID0gVFJVRSwgcHJpbnQuYXVjID0gVFJVRSwgbGVnYWN5LmF4ZXMgPSBUUlVFKQpsaXN0X2F1YyA8LSBjKGxpc3RfYXVjLCBhcmVhJGF1YykKCiMgcnBhcnQgbW9kZWwKcnBhcnQucGxvdChydHJlZV9tb2RlbCRmaW5hbE1vZGVsKQpmaWxlbmFtZSA8LXBhc3RlKCcuLi8uLi8uLi9kYXRhL3RyZWVzL21vZGVscy9kZWNrL3RyZWUtZGVjay1uby11bmRlcnNhbXBsZScsIHRvU3RyaW5nKHRsKSwgJy5jc3YnLCBjb2xsYXBzZSA9ICcnKQp3cml0ZS5jc3YocnBhcnQucnVsZXMocnRyZWVfbW9kZWwkZmluYWxNb2RlbCwgcm91bmRpbnQ9RkFMU0UsIGNsaXAuZmFjcz1UUlVFKSwgZmlsZW5hbWUpCgojIFByb2JhYmlsaXR5IHZhbHVlcwp0cmVlX3Byb2JfdGVzdFsnVHVuZWxlbmd0aCddIDwtIHJlcCh0bCwgbGVuZ3RoKHRyZWVfY2xhc3NfdGVzdCkpCnRyZWVfcHJvYl90ZXN0WydTdHJ1Y3R1cmVOdW1iZXInXSA8LSBkZltyb3duYW1lcyh0cmVlX3Byb2JfdGVzdCksXSRzdHJ1Y3R1cmUubnVtYmVyCgojIENvbmNhdGVuYXRlCnByb2JhYmlsaXRpZXNfZHQgPC0gYmluZF9yb3dzKHByb2JhYmlsaXRpZXNfZHQsIHRyZWVfcHJvYl90ZXN0KQoKIyBtZXRyaWNzCm1ldHJpY3MKfQoKZGZfbWV0cmljX2R0IDwtIGRhdGEuZnJhbWUobGlzdF9zZW5zLCBsaXN0X3NwZWMsIGxpc3RfZjEsIGxpc3RfdGwsIGxpc3Rfa2FwcGEsIGxpc3RfYXVjKQpuYW1lcyhkZl9tZXRyaWNfZHQpIDwtIGMoJ1NlbnNpdGl2aXR5JywgJ1NwZWNpZmljaXR5JywgJ0YxJywnVHVuZWxlbmd0aCcsICdLYXBwYScsICdBVUMnKQoKIyBXcml0aW5nIG91dHB1dHMKd3JpdGUuY3N2KHByb2JhYmlsaXRpZXNfZHQsICcuLi8uLi8uLi9kYXRhL3RyZWVzL21ldHJpY3MvZHQvdHJlZS1wcm9iLWRlY2stbm91LmNzdicpCndyaXRlLmNzdihkZl9tZXRyaWNfZHQsICcuLi8uLi8uLi9kYXRhL3RyZWVzL21ldHJpY3MvZHQvdHJlZS1tZXRyaWMtZGVjay1ub3UuY3N2JykKCmRmX21ldHJpY19kdApwcm9iYWJpbGl0aWVzX2R0CmBgYAoK